---
title: "Student Debt Project"
author: "William Harrison and Jacob Smith"
output: html_document
date: "2024-06-07"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## R Markdown

## Load Packages and Data
```{r load-packages}
library(tidyverse)
library(ggeffects)
library(MASS)
library(grid)
library(gridExtra)
poll <- read_csv("D:/poll_data.csv")
poll_rv_stats <- poll <- read_csv("D:/poll_data.csv")
# Change path here based on where you've saved the data.
```

## Recoding Variables

```{r recoding}
poll <- poll %>%
  filter(registered_voter == "Yes") %>%
  mutate(ideology_numeric = case_when(ideology == "Very Liberal" ~ 1,
                                      ideology == "Liberal" ~ 2,
                                      ideology == "Moderate" ~ 3,
                                      ideology == "Conservative"~ 4,
                                      ideology == "Very Conservative" ~ 5)) %>%
  drop_na(race) %>%
  drop_na(hispanic_latine) %>%
  mutate(bipoc = ifelse(race == "White" & hispanic_latine == "No", 0, 1)) %>%
  mutate(biden_non_ranked = ifelse(election_24 == "Joe Biden", 1, 0)) %>%
  mutate(biden_trump = case_when(election_24 == "Joe Biden" ~ 1,
                                 election_24 == "Donald Trump" ~ 0)) %>%
  mutate(biden_rfk = case_when(election_24 == "Joe Biden" ~ 1,
                               election_24 == "Robert F. Kennedy Jr." ~ 0)) %>%
  mutate(gender_male = ifelse(gender== "Male", 1, 0)) %>%
  mutate(pid_3 = case_when(party_id == "Democratic Party" ~ 1,
                                    party_id == "I identify as Independent" ~ 2,
                                    party_id == "Republican Party" ~ 3)) %>%
  mutate(biden_rank = case_when(election_24_ranked_1 == 1 ~ 1,
                               election_24_ranked_1 == 2 ~ 2,
                               election_24_ranked_1 == 3 ~ 3,
                               election_24_ranked_1 == 4 ~ 4,
                               election_24_ranked_1 == 5 ~ 5,
                              is.na(election_24_ranked_1) ~ 6)) %>%
  mutate(loans_change_vote = case_when(loans_forgive_vote_biden == "Much more likely" ~ 1,
                                       loans_forgive_vote_biden == "Slightly more likely" ~ 2,
                                       loans_forgive_vote_biden == "This would not influence my vote" ~ 3,
                                       loans_forgive_vote_biden == "Slightly less likely" ~ 4,
                                       loans_forgive_vote_biden == "Much less likely" ~ 5)) %>%
  mutate(amount_loans = case_when(how_much_loans == "$0-$5,000" ~ 1,
                                  how_much_loans == "$5,001-$10,000" ~ 2,
                                  how_much_loans == "$10,001-$20,000" ~ 3,
                                  how_much_loans == "$20,001-$30,000" ~ 4,
                                  how_much_loans == "$30,001-$50,000" ~ 5,
                                  how_much_loans == "$50,001-$75,000" ~ 6,
                                  how_much_loans == "$75,001-$100,000" ~ 7,
                                  how_much_loans == "$100,001-$150,000" ~ 8,
                                  how_much_loans == ">$150,000" ~ 9)) %>%
  mutate(have_debt = case_when(amount_loans == 1 & loans_informed == "n/a" ~ 0,
                               amount_loans == 1 & loans_informed != "n/a" ~ 1,
                               amount_loans > 1 ~ 1)) %>%
  mutate(year = case_when(class_yr == "First year" ~ 1,
                          class_yr == "Sophomore"  ~ 2,
                          class_yr == "Junior" ~ 3,
                          class_yr == "Senior" ~ 4)) %>%
  mutate(gender_3 = case_when(gender == "Female" ~ "Female",
                              gender == "Male" ~ "Male",
                              gender == "Non-binary" |
                              gender == "Other [enter here]" ~ "Non-binary/Oth")) %>%
  mutate(white = ifelse(race == "White" & hispanic_latine == "No", 1, 0)) %>%
  mutate(black = ifelse(race == "Black or African American" |race == "Black or African American,Asian" | race == "White,Black or African American" | race == "White,Black or African American,American Indian or Alaska Native"| race == "White,Black or African American,American Indian or Alaska Native,Asian,Native Hawaiian or Other Pacific Islander", 1, 0)) %>%
  mutate(black2 = ifelse(black == 1 & hispanic_latine == "No", 1, 0)) %>%
  mutate(hispanic_latine2 = ifelse(hispanic_latine == "Yes" & black == 0, 1, 0)) %>%
  mutate(other = ifelse(white == 1 | black2 == 1 | hispanic_latine2 == 1, 0, 1))



poll$loans_factor <- factor(poll$biden_loans_enough, levels = c("Too little", "Enough", "Too much"))

poll$support_factor <- factor(poll$loans_forgive_vote_biden, levels = c("Much more likely", "Slightly more likely", "This would not influence my vote", "Slightly less likely", "Much less likely"))

poll$ideology_factor <- factor(poll$ideology, levels = c("Very Liberal", "Liberal", "Moderate", "Conservative", "Very Conservative", "Unsure"))
```

```{r regressions}
# Regression 2024 Election Results
model_1 <- glm(biden_non_ranked ~ loans_factor + bipoc + ideology_numeric + gender_3 + pid_3 + year, family = binomial, data = poll)
summary(model_1)
logLik(model_1)
prediction <- ggpredict(model_1, terms = c("loans_factor", "bipoc"[0]))

plot(prediction) +
  labs(x = "Views on Biden Student Loan Actions", y= "Percent Voting Biden",
       caption = "Note: Variables Set at Mean or Modal Value") +
  ggtitle("Figure 1: Predicted Probability of Voting for Joe Biden") +
  scale_colour_grey() +
  theme(plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0.5))

model_2 <- glm(biden_trump ~ loans_factor + bipoc + ideology_numeric + gender_3 + pid_3 + year, family = binomial, data = poll)
summary(model_2)
logLik(model_2)

model_3 <- glm(biden_rfk ~ loans_factor + bipoc + ideology_numeric + gender_3 + pid_3 + year, family = binomial, data = poll)
summary(model_3)
logLik(model_3)

# With Multiple Race/Ethnicity Categories

model_1_multiple <- glm(biden_non_ranked ~ loans_factor  + ideology_numeric + black2 + hispanic_latine2 + other  + gender_3 + pid_3 + year, family = binomial, data = poll)
summary(model_1_multiple)
logLik(model_1_multiple)

model_2_multiple <- glm(biden_trump ~ loans_factor + black2 + hispanic_latine2 + other + ideology_numeric + gender_3 + pid_3 + year, family = binomial, data = poll)
summary(model_2_multiple)
logLik(model_2_multiple)

model_3_multiple <- glm(biden_rfk ~ loans_factor + black2 + hispanic_latine2 + other + ideology_numeric + gender_3 + pid_3 + year, family = binomial, data = poll)
summary(model_3_multiple)
logLik(model_3_multiple)
```

```{r count-black-students}

# Model 1 Count of Black Students
poll %>% 
  filter(!is.na(biden_non_ranked)) %>%
  filter(!is.na(loans_factor)) %>%
  filter(!is.na(black2)) %>%
  filter(!is.na(hispanic_latine2)) %>%
  filter(!is.na(other)) %>%
  filter(!is.na(ideology_numeric)) %>%
  filter(!is.na(gender_3)) %>%
  filter(!is.na(pid_3)) %>%
  filter(!is.na(year)) %>%
  count(black2)

# Model 2 Count of Black Students
poll %>% 
  filter(!is.na(biden_trump)) %>%
  filter(!is.na(loans_factor)) %>%
  filter(!is.na(black2)) %>%
  filter(!is.na(hispanic_latine2)) %>%
  filter(!is.na(other)) %>%
  filter(!is.na(ideology_numeric)) %>%
  filter(!is.na(gender_3)) %>%
  filter(!is.na(pid_3)) %>%
  filter(!is.na(year)) %>%
  count(black2)

#Model 3 Count of Black Students

poll %>% 
  filter(!is.na(biden_rfk)) %>%
  filter(!is.na(loans_factor)) %>%
  filter(!is.na(black2)) %>%
  filter(!is.na(hispanic_latine2)) %>%
  filter(!is.na(other)) %>%
  filter(!is.na(ideology_numeric)) %>%
  filter(!is.na(gender_3)) %>%
  filter(!is.na(pid_3)) %>%
  filter(!is.na(year)) %>%
  count(black2)
```

```{r debt-vs-not-debt}
# Stats for Debt vs. Not Debt
  
poll %>% 
  filter(!is.na(biden_non_ranked)) %>%
  filter(!is.na(loans_factor)) %>%
  filter(!is.na(have_debt)) %>% 
  group_by (loans_factor, have_debt, biden_non_ranked) %>%
  summarise(n = n()) 
```

``` {r persuadable}
poll %>%
    drop_na(loans_forgive_vote_biden) %>%
    filter(biden_rank != 1) %>%
    group_by(biden_rank, support_factor) %>%
    summarise(n = n()) %>%
    mutate(perc = n/sum(n)) %>%
    ggplot(aes(x = biden_rank, y = perc, fill = support_factor)) +
    geom_bar(stat = "identity")  +
  scale_fill_grey() +
  scale_x_continuous(
    breaks=c(2, 3, 4, 5, 6),
    labels=c("2", "3", "4", "5", "Unranked")
    ) +
   theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Biden Rank", y = "Percentage of Respondents", fill = "Support Biden if Action on Loans?", title = "Figure 2: Rank and Support for Biden Based on Action on Loans") 
```

```{r plot-ideology}
# Plot by Ideology
poll %>%
    drop_na(loans_forgive_vote_biden) %>%
    filter(biden_rank!= 1) %>%
    group_by(biden_rank, ideology_factor) %>%
    summarise(n = n()) %>%
    mutate(perc = n/sum(n)) %>%
    ggplot(aes(x = biden_rank, y = perc, fill = ideology_factor)) +
    geom_bar(stat = "identity") +
  scale_fill_grey() +
    scale_x_continuous(
    breaks=c(2, 3, 4, 5, 6),
    labels=c("2", "3", "4", "5", "Unranked")
    ) +
     theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Biden Rank", y = "Percentage of Respondents", fill = "Ideology of Respondents", title = "Figure 3: Ideology and Biden Rank")

```

``` {r biden-second-who-first}
## When Biden is Second Choice, Who Was First Choice and Does that Impact Support for Relief
poll %>%
  drop_na(loans_forgive_vote_biden) %>%
  filter(biden_rank == 2) %>%
  mutate(first_choice = case_when(election_24_ranked_2 == 1 ~ "RFK",
                                   election_24_ranked_3 == 1 ~ "Trump",
                                   election_24_ranked_4 == 1 ~ "Libertarian",
                                  election_24_ranked_5 == 1 ~ "Green")) %>%
  count(first_choice, loans_factor)
```

```{r sum-stats}
poll_rvs <- poll_rv_stats %>%
  filter(registered_voter == "Yes") 

poll_rvs %>%
mutate(white = ifelse(race == "White" & hispanic_latine == "No", 0, 1)) %>%
mutate(black = ifelse(race == "Black or African American", 1, 0)) %>%
mutate(hispanic_latine_num = if_else(hispanic_latine == "Yes", 1, 0)) %>%
mutate(asian = ifelse(race == "Asian", 1, 0)) %>%
mutate(commuter_num = ifelse(commuter == "Yes", 1, 0)) %>%
summarize(mean_white = mean(white, na.rm = TRUE),
          mean_black= mean(black, na.rm = TRUE),
          mean_hisp = mean(hispanic_latine_num, na.rm = TRUE),
          mean_asian = mean(asian, na.rm = TRUE),
          mean_commuter_num = mean(commuter_num, na.rm = TRUE)) 

poll_rvs %>%
  mutate(amount_loans = case_when(how_much_loans == "$0-$5,000" ~ 1,
                                  how_much_loans == "$5,001-$10,000" ~ 2,
                                  how_much_loans == "$10,001-$20,000" ~ 3,
                                  how_much_loans == "$20,001-$30,000" ~ 4,
                                  how_much_loans == "$30,001-$50,000" ~ 5,
                                  how_much_loans == "$50,001-$75,000" ~ 6,
                                  how_much_loans == "$75,001-$100,000" ~ 7,
                                  how_much_loans == "$100,001-$150,000" ~ 8,
                                  how_much_loans == ">$150,000" ~ 9)) %>%
  mutate(have_debt = case_when(amount_loans == 1 & loans_informed == "n/a" ~ 0,
                               amount_loans == 1 & loans_informed != "n/a" ~ 1,
                               amount_loans > 1 ~ 1)) %>%
  group_by(have_debt) %>%
  count() %>%
  mutate(perc = n/336)

poll_rvs %>%
  group_by(class_yr) %>%
  count() %>%
  mutate(perc = n/336)

poll_rvs %>%
  group_by(gender) %>%
  count() %>%
  mutate(perc = n/336)

```

```{r sum-stats-tab-1-appendix}
poll <- poll %>%
  mutate(loans_little = case_when(loans_factor == "Too little" ~ 1,
                                  loans_factor == "Enough" ~ 0,
                                  loans_factor == "Too much" ~ 0)) %>%
    mutate(loans_enough = case_when(loans_factor == "Too little" ~ 0,
                                  loans_factor == "Enough" ~ 1,
                                  loans_factor == "Too much" ~ 0)) %>%
    mutate(loans_much = case_when(loans_factor == "Too little" ~ 0,
                                  loans_factor == "Enough" ~ 0,
                                  loans_factor == "Too much" ~ 1)) %>%
  mutate(non_binary = case_when(gender == "Female" ~ 0,
                              gender == "Male" ~ 0,
                              gender == "Non-binary" |
                              gender == "Other [enter here]" ~ 1)) 

summary(poll$biden_non_ranked)
summary(poll$biden_trump)
summary(poll$biden_rfk)
summary(poll$loans_little)
summary(poll$loans_enough)
summary(poll$loans_much)
summary(poll$bipoc)
summary(poll$ideology_numeric) 
summary(poll$gender_male)
summary(poll$non_binary)
summary(poll$pid_3)
summary(poll$year)

sd(poll$biden_non_ranked, na.rm = TRUE)
sd(poll$biden_trump, na.rm = TRUE)
sd(poll$biden_rfk, na.rm = TRUE)
sd(poll$loans_little, na.rm = TRUE)
sd(poll$loans_enough, na.rm = TRUE)
sd(poll$loans_much, na.rm = TRUE)
sd(poll$bipoc, na.rm = TRUE)
sd(poll$ideology_numeric, na.rm = TRUE) 
sd(poll$gender_male, na.rm = TRUE)
sd(poll$non_binary, na.rm = TRUE)
sd(poll$pid_3, na.rm = TRUE)
sd(poll$year, na.rm = TRUE)
```
